setwd("/Users/Kadeem Gilbert/Desktop/Microbiome project/R_analysis")

### Start by analyzing eukaryotic datasets 

eotu <- read.table("otu_table_Eukaryota_fixed3.txt",sep="\t",head=T,row.names=2) #read in OTU table
eotu.notax <- eotu[,2:ncol(eotu)] # remove taxonomy
summary(rowSums(eotu.notax))
summary(colSums(eotu.notax))
eotu.t <- t(eotu.notax) # transpose rows and columns
eotu.t <- eotu.t[order(row.names(eotu.t)),] #order row names

mdat <- read.table("MicrobiomeMetadata.txt", sep="\t", head=T, row.names=1) #get metadata
mdat <- mdat[order(row.names(mdat)),] # order row names
emdat <- subset(mdat, row.names(mdat) %in% row.names(eotu.t)) # subset, because metadata had more samples
rownames(emdat) == rownames(eotu.t) # sanity check

summary(rowSums(eotu.t))
summary(colSums(eotu.t))
eotu.t1K <- eotu.t[rowSums(eotu.t) > 1000,] # keep only samples with at least 1K seqs
summary(rowSums(eotu.t1K))
summary(colSums(eotu.t1K))
eotu.t1K <- eotu.t1K[,colSums(eotu.t1K) > 0] 


if (!require("vegan")) {install.packages("vegan"); require("vegan")}

eotu.t1K.r <- rrarefy(eotu.t1K,1103) # rarefy to ~1K for all samples

summary(colSums(eotu.t1K.r))
eotu.t1K.r <- eotu.t1K.r[,colSums(eotu.t1K.r) > 0] # get rid of OTUs with 0 observations in rarefied table
summary(rowSums(eotu.t1K.r))

emdat1 <- subset(emdat, row.names(emdat) %in% row.names(eotu.t1K.r)) # Have to trim down the  metadata because we got rid of low-abundance samples

### Subset to just Mindanao

eotu.m <- subset(eotu.t, emdat$Project=="Mindanao")
emdat.m <- subset(emdat, row.names(emdat) %in% row.names(eotu.m)) 

emdat.m$Project <- emdat.m$Project[drop=T] # drop any categories that are no longer in the subsetted metadata file
emdat.m$Species <- emdat.m$Species[drop=T]
emdat.m$Canopy <- emdat.m$Canopy[drop=T]
emdat.m$Width <- emdat.m$Width[drop=T]
emdat.m$Length <- emdat.m$Length[drop=T]
emdat.m$Volume <- emdat.m$Volume[drop=T]
emdat.m$pH <- emdat.m$pH[drop=T]
emdat.m$Elevation <- emdat.m$Elevation[drop=T]

summary(rowSums(eotu.m))
eotu.m <- eotu.m[rowSums(eotu.m) > 1000,] 
summary(colSums(eotu.m))
eotu.m <- eotu.m[,colSums(eotu.m) > 0] # get rid of OTUs with 0 observations in rarefied table

emdat.m <- subset(emdat.m, row.names(emdat.m) %in% row.names(eotu.m)) # Have to trim down the  metadata because we got rid of low-abundance samples

eotu.m.r <- rrarefy(eotu.m,1103) # rarefy to 1K for all samples
summary(colSums(eotu.m.r))
eotu.m.r <- eotu.m.r[,colSums(eotu.m.r) > 0] 


##### Repeat everything with bacteria

botu <- read.table("otu_table_Bacteria_fixed3.txt",sep="\t",head=T,row.names=2) #read in OTU table
botu.notax <- botu[,2:ncol(botu)] # remove taxonomy
summary(rowSums(botu.notax))
summary(colSums(botu.notax))
botu.t <- t(botu.notax) # transpose rows and columns
botu.t <- botu.t[order(row.names(botu.t)),] #order row names

#mdat <- read.table("Metadata_HP_M.txt", sep="\t", head=T, row.names=1) #get metadata
#mdat <- mdat[order(row.names(mdat)),] # order row names
bmdat <- subset(mdat, row.names(mdat) %in% row.names(botu.t)) # subset, because metadata had more samples
rownames(bmdat) == rownames(botu.t) # sanity check

summary(rowSums(botu.t))
summary(colSums(botu.t))
botu.t1K <- botu.t[rowSums(botu.t) > 1000,] # keep only samples with at least 1K seqs
summary(rowSums(botu.t1K))
summary(colSums(botu.t1K))
botu.t1K <- botu.t1K[,colSums(botu.t1K) > 0] 


#if (!require("vegan")) {install.packages("vegan"); require("vegan")}

botu.t1K.r <- rrarefy(botu.t1K,1311) # rarefy to ~1K for all samples

summary(colSums(botu.t1K.r))
botu.t1K.r <- botu.t1K.r[,colSums(botu.t1K.r) > 0] # get rid of OTUs with 0 observations in rarefied table
summary(rowSums(botu.t1K.r))

bmdat1 <- subset(bmdat, row.names(bmdat) %in% row.names(botu.t1K.r)) # Have to trim down the  metadata because we got rid of low-abundance samples

### Subset to Mindanao

botu.m <- subset(botu.t, bmdat$Project=="Mindanao")
bmdat.m <- subset(bmdat, row.names(bmdat) %in% row.names(botu.m)) 

bmdat.m$Project <- bmdat.m$Project[drop=T] # drop any categories that are no longer in the subsetted metadata file
bmdat.m$Species <- bmdat.m$Species[drop=T]
bmdat.m$Canopy <- bmdat.m$Canopy[drop=T]
bmdat.m$Width <- bmdat.m$Width[drop=T]
bmdat.m$Length <- bmdat.m$Length[drop=T]
bmdat.m$Volume <- bmdat.m$Volume[drop=T]
bmdat.m$pH <- bmdat.m$pH[drop=T]
bmdat.m$Elevation <- bmdat.m$Elevation[drop=T]
bmdat.m$Distance <- bmdat.m$Distance[drop=T]

summary(rowSums(botu.m))
# min is 7032
summary(colSums(botu.m))
botu.m <- botu.m[,colSums(botu.m) > 0] # get rid of OTUs with 0 observations in rarefied table

botu.m.r <- rrarefy(botu.m,7032) # rarefy to 1K for all samples
summary(colSums(botu.m.r))
botu.m.r <- botu.m.r[,colSums(botu.m.r) > 0] 

### Phyloseq

## Install phyloseq
require(phyloseq)
if (!require("picante")) {install.packages("picante"); require("picante")}

eotu.tree <- read.tree("rep_set_18S.tre") ## rename the 18S tree and make sure it is in your current directory, then read it in with picante

row.names(eotu.m.r) == row.names(emdat.m)

## Prune tree and put into format for phyloseq
eotu.m.tree <- prune.sample(eotu.m.r, eotu.tree)
OTU.eotu.m = otu_table(as.matrix(eotu.m.r), taxa_are_rows = FALSE)
SAM.eotu.m = sample_data(emdat.m)
eotu.m.physeq <-merge_phyloseq(phyloseq(OTU.eotu.m),SAM.eotu.m,eotu.m.tree)

## Measure Unifrac distance and run NMDS
eotu.m.uu.dist <- distance(eotu.m.physeq,"uUniFrac")
eotu.m.uu.nmds <- metaMDS(eotu.m.uu.dist, trymax=800)

## Plot NMDS by pH with ordisurf 
emdat.m.pH <- as.factor(emdat.m$pH)
customcol6 <- c("red","orange","yellow","palegreen2","dodgerblue2","purple4")

plot(eotu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes eukaryotic community by pH", 
     col= rainbow(10)[emdat.m.pH],
     pch =19)
legend(locator(1), 
       legend=c("1.5","2","2.5","3", "3.5", "4", "4.5", "5", "5.5", "6"),
       col= rainbow(10),
       pch=19,
       cex=0.75,
       y.intersp=0.37,
       x.intersp=0.37,
       inset=0,
       bty = "n")
ordisurf(eotu.m.uu.nmds,emdat.m$pH, add=T, col="darkgray")

plot(eotu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes eukaryotic community by elevation",
     pch =19)
ordisurf(eotu.m.uu.nmds,emdat.m$Elevation, add=T, col="darkgray")

plot(eotu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes eukaryotic community by canopy coverage", 
     col= c("blue","red","palegreen")[emdat.m$Canopy],
     pch=19)
legend("bottomleft", 
       legend=c("closed","semi-open","open"),
       col= c("blue","red","palegreen"),
       pch=19,
       cex=0.95,
       y.intersp=0.37,
       x.intersp=0.37,
       inset=0,
       bty = "n")
ordisurf(eotu.m.uu.nmds,emdat.m$Percent_Open, add=T, col="darkgray")

## Run mantel test and ordisurf on pH and volume
min(emdat.m$pH)
max(emdat.m$pH)
eotu.m.pH <- vegdist(emdat.m$pH,method="euclidean") 
eotu.mpH.man <- mantel(eotu.m.uu.dist,eotu.m.pH, permutations=999)
eotu.mpH.man

eotu.m.vol <- vegdist(emdat.m$Fluid_Volume,method="euclidean") 
eotu.mvol.man <- mantel(eotu.m.uu.dist,eotu.m.vol, permutations=999)
eotu.mvol.man

eotu.m.ele <- vegdist(emdat.m$Elevation,method="euclidean") 
eotu.mele.man <- mantel(eotu.m.uu.dist,eotu.m.ele, permutations=999)
eotu.mele.man

eotu.m.can <- vegdist(emdat.m$Percent_Open,method="euclidean") 
eotu.mcan.man <- mantel(eotu.m.uu.dist,eotu.m.can, permutations=999)
eotu.mcan.man

eotu.m.geo <- vegdist(emdat.m$Distance,method="euclidean") 
eotu.mgeo.man <- mantel(eotu.m.uu.dist,eotu.m.geo, permutations=999)
eotu.mgeo.man

eotu.m.len <- vegdist(emdat.m$Length,method="euclidean") 
eotu.mlen.man <- mantel(eotu.m.uu.dist,eotu.m.len, permutations=999)
eotu.mlen.man

eotu.m.wid <- vegdist(emdat.m$Width,method="euclidean") 
eotu.mwid.man <- mantel(eotu.m.uu.dist,eotu.m.wid, permutations=999)
eotu.mwid.man

#PERMANOVA for morph/color

eotu.m.ad1 <- adonis(eotu.m.uu.dist ~ Morph, data=emdat.m)
eotu.m.ad1
eotu.m.ad2 <- adonis(eotu.m.uu.dist ~ Pitcher_Color, data=emdat.m)
eotu.m.ad2

emdat.m$Morph<-factor(emdat.m$Morph, levels=c("Lower", "Upper"))
emdat.m$Morph
plot(eotu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes eukaryotic community by pitcher morph", 
     col= c("purple","orange")[emdat.m$Morph],
     pch=19)
legend(locator(1), 
       legend=c("Lower","Upper"),
       col= c("purple","orange"),
       pch=19,
       cex=0.95,
       y.intersp=0.37,
       x.intersp=0.37,
       inset=0,
       bty = "n")

plot(eotu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes eukaryotic community by pitcher color", 
     col= c("black","green","red")[emdat.m$Pitcher_Color],
     pch=19)
legend("topright", 
       legend=c("Green","Red"),
       col= c("green","red"),
       pch=19,
       cex=0.95,
       y.intersp=0.37,
       x.intersp=0.37,
       inset=0,
       bty = "n")

#Repeat everything for bacteria
botu.tree <- read.tree("rep_set_16S.tre") ## rename the 16S tree and make sure it is in your current directory, then read it in with picante

row.names(botu.m.r) == row.names(bmdat.m)

## Prune tree and put into format for phyloseq
botu.m.tree <- prune.sample(botu.m.r, botu.tree)
OTU.botu.m = otu_table(as.matrix(botu.m.r), taxa_are_rows = FALSE)
SAM.botu.m = sample_data(bmdat.m)
botu.m.physeq <-merge_phyloseq(phyloseq(OTU.botu.m),SAM.botu.m,botu.m.tree)

## Measure Unifrac distance and run NMDS
botu.m.uu.dist <- distance(botu.m.physeq,"uUniFrac")
botu.m.uu.nmds <- metaMDS(botu.m.uu.dist, trymax=800)

## Plot NMDS by pH with ordisurf 
bmdat.m.pH <- as.factor(bmdat.m$pH)
customcol6 <- c("red","orange","yellow","palegreen2","dodgerblue2","purple4")

plot(botu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes bacterial community by pH", 
     col= rainbow(10)[bmdat.m.pH],
     pch =19)
legend(locator(1), 
       legend=c("1.5","2","2.5","3", "3.5", "4", "4.5", "5", "5.5", "6"),
       col= rainbow(10),
       pch=19,
       cex=0.75,
       y.intersp=0.37,
       x.intersp=0.37,
       inset=0,
       bty = "n")
ordisurf(botu.m.uu.nmds,bmdat.m$pH, add=T, col="darkgray")

plot(botu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes bacterial community by elevation", 
     pch =19)
ordisurf(botu.m.uu.nmds,bmdat.m$Elevation, add=T, col="darkgray")

plot(botu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes bacterial community by canopy coverage", 
     col= c("blue","red","palegreen")[bmdat.m$Canopy],
     pch=19)
legend("bottomleft", 
       legend=c("closed","semi-open","open"),
       col= c("blue","red","palegreen"),
       pch=19,
       cex=0.95,
       y.intersp=0.37,
       x.intersp=0.37,
       inset=0,
       bty = "n")
ordisurf(botu.m.uu.nmds,bmdat.m$Percent_Open, add=T, col="darkgray")

## Run mantel test and ordisurf on pH and volume
min(bmdat.m$pH)
max(bmdat.m$pH)
botu.m.pH <- vegdist(bmdat.m$pH,method="euclidean") 
botu.mpH.man <- mantel(botu.m.uu.dist,botu.m.pH, permutations=999)
botu.mpH.man

botu.m.vol <- vegdist(bmdat.m$Fluid_Volume,method="euclidean") 
botu.mvol.man <- mantel(botu.m.uu.dist,botu.m.vol, permutations=999)
botu.mvol.man

botu.m.ele <- vegdist(bmdat.m$Elevation,method="euclidean") 
botu.mele.man <- mantel(botu.m.uu.dist,botu.m.ele, permutations=999)
botu.mele.man

botu.m.can <- vegdist(bmdat.m$Percent_Open,method="euclidean") 
botu.mcan.man <- mantel(botu.m.uu.dist,botu.m.can, permutations=999)
botu.mcan.man

botu.m.geo <- vegdist(bmdat.m$Distance,method="euclidean") 
botu.mgeo.man <- mantel(botu.m.uu.dist,botu.m.geo, permutations=999)
botu.mgeo.man

botu.m.len <- vegdist(bmdat.m$Length,method="euclidean") 
botu.mlen.man <- mantel(botu.m.uu.dist,botu.m.len, permutations=999)
botu.mlen.man

botu.m.wid <- vegdist(bmdat.m$Width,method="euclidean") 
botu.mwid.man <- mantel(botu.m.uu.dist,botu.m.wid, permutations=999)
botu.mwid.man

#PERMANOVA for morph/color

botu.m.ad1 <- adonis(botu.m.uu.dist ~ Morph, data=bmdat.m)
botu.m.ad1
botu.m.ad2 <- adonis(botu.m.uu.dist ~ Pitcher_Color, data=bmdat.m)
botu.m.ad2


bmdat.m$Morph<-factor(bmdat.m$Morph, levels=c("Lower", "Upper"))
bmdat.m$Morph
plot(botu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes bacterial community by pitcher morph", 
     col= c("purple","orange")[bmdat.m$Morph],
     pch=19)
legend("bottomright", 
       legend=c("Lower","Upper"),
       col= c("purple","orange"),
       pch=19,
       cex=0.95,
       y.intersp=0.37,
       x.intersp=0.37,
       inset=0,
       bty = "n")

plot(botu.m.uu.nmds$points, xlab="NMDS Axis 1", ylab="NMDS Axis 2", 
     main="Nepenthes eukaryotic community by pitcher color", 
     col= c("black","green","red")[bmdat.m$Pitcher_Color],
     pch=19)
legend("topright", 
       legend=c("Green","Red"),
       col= c("green","red"),
       pch=19,
       cex=0.95,
       y.intersp=0.37,
       x.intersp=0.37,
       inset=0,
       bty = "n")


